In [518]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import numpy as np
import plotly.io as pio
pio.renderers.default ='notebook'

Lionel Messi Data Analysis¶

Lionel Andres Messi started his career in 2003, scoring a total of 701 goals playing for different Clubs, not counting the Argentinian National Team. The Follow dataset analyses and presents that information along different charts and graphs.

In [494]:
df = pd.read_csv('messi.csv')
In [495]:
df['Date'] = pd.to_datetime(df['Date'])
df['Goles'] = 1

How many goals has Messi scored along different tournaments¶

In [496]:
uno = df.groupby(['Tournament']).count()
In [519]:
colors = px.colors.cyclical.Phase
fig = px.pie(df, names='Tournament', values='Goles', height=800, width=1020,color_discrete_sequence= colors)
fig.update_traces(hoverinfo='label+percent', textinfo='value', textfont_size=15,
                  marker=dict(colors=colors, line=dict(color='#000000', width=1)))
fig.update_layout(uniformtext_minsize=12, uniformtext_mode='hide' ,title_text='Goals Scored by Tournament',title_x=0.45)
fig.show()
In [520]:
color = px.colors.qualitative.Set1
fig = px.histogram(df,y=df.Goles.sort_values(), x='Tournament', height=800, width=1020, barmode='relative',text_auto  = True,title='Goals scored by Tournament',color='Tournament',
color_discrete_sequence= color).update_xaxes(categoryorder="total descending")
fig.show()
In [515]:
# minmatch = pd.DataFrame()
# minmatch['Tournament'] = df2['Tournament'] 
# minmatch['MinuteNum2'] = df2['MinuteNum2']
In [517]:
# ser = df2.groupby(['Tournament','MinuteNum2']).Goles.count()
# ser = ser.unstack().fillna(0).astype('int')
In [501]:
df2 = df.copy()
In [502]:
df2['MinuteNum'] = df2['Minute']
In [528]:
df2['MinuteNum'] = df2['MinuteNum'].str.replace('[^0-9]', '',regex=True).astype('int64')
In [568]:
df2['MinuteNum2'] = df2['MinuteNum'] 
df2.loc[df2['MinuteNum2'] > 90, 'MinuteNum2'] = 93

Minute of the game when Messi scored all his 701 goals¶

In [542]:
ordernumb = df2.groupby(['MinuteNum2'])['Tournament'].count().to_frame().reset_index()
ordernumb = ordernumb.rename(columns={'MinuteNum2': 'Minute', 'Tournament': 'Goals Scored'})
ordernumb2 = ordernumb.copy()
ordernumb2
Out[542]:
Minute Goals Scored
0 2 1
1 3 4
2 4 4
3 5 8
4 6 2
... ... ...
85 87 13
86 88 9
87 89 9
88 90 12
89 93 33

90 rows × 2 columns

In [536]:
fig = px.bar(ordernumb2, x='Minute', y= 'Goals Scored', height=600, width=1020, title='Minute of the game when Messi scored' \
        ,color='Goals Scored', text='Goals Scored')
fig.update_traces(texttemplate='%{text:.1.5s}', textposition='outside')
# fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide')
fig.show()

The data shows that during his career, he has scored more goals (14) at minute 55', then at minute 78' and 87' he has 13, being the second and the third largest amount 87'. All the goals converted during the extra time have been summed and totalized aside, with a total of 33.

BoxPlot showing mean, median and quarter in minutes when Messi scores (By Tournament)¶

In [613]:
color = px.colors.qualitative.Set1
fig = px.box(df2, y='MinuteNum2', x="Tournament",color="Tournament", color_discrete_sequence= color, boxmode= "overlay",height=600, width=1020 )
fig.show()

The graph shown before indicates the mean, median and quarter when Messi has scored all his goals, divided also by Tournament.

Which team has received most of Messis Goals (Top 25)¶

In [507]:
rival = df2.groupby(['Opponent'])['Goles'].count().sort_values(ascending=False).to_frame('Total').head(25).reset_index()
rival
Out[507]:
Opponent Total
0 Sevilla FC 38
1 Atlético de Madrid 32
2 Valencia CF 31
3 Athletic Bilbao 29
4 Real Betis Balompié 26
5 Real Madrid 26
6 CA Osasuna 25
7 RCD Espanyol Barcelona 25
8 Levante UD 24
9 Getafe CF 21
10 Deportivo de La Coruña 20
11 SD Eibar 20
12 Rayo Vallecano 18
13 Real Sociedad 18
14 RCD Mallorca 16
15 Villarreal CF 16
16 Granada CF 15
17 Real Zaragoza 14
18 Celta de Vigo 14
19 Málaga CF 13
20 UD Almería 13
21 Deportivo Alavés 13
22 Racing Santander 12
23 CD Leganés 11
24 Arsenal FC 9
In [525]:
fig = px.bar(rival,height=800, width=1020,y='Total', x='Opponent',color='Opponent', color_continuous_scale=px.colors.qualitative.Set2, text='Total', title='Which team has received most of Messis Goals (Top 25)')
fig.update_traces(texttemplate='%{text:1.3.s}', textposition='outside')
fig.update_xaxes(tickangle=45)
fig.show()

Sevilla, Atletico de Madrid and Valencia FC were the teams more affected by Messi with a total of 38, 32 and 31 goals suffered by the argentinian striker

Amount of goals scored during Messi's career (Total and by Tournament)¶

In [509]:
df2['Date'] = df2['Date'].apply(pd.Timestamp)
df2['Date_M'] = df2['Date'].apply(pd.Timestamp)
df2['Date_M'] = pd.to_datetime(df2['Date_M']).dt.to_period('M')
df2['Date_Y'] = df2['Date'].dt.year
In [614]:
years = df2.groupby(['Date_Y'])['Goles'].count().to_frame('Total').reset_index()
In [617]:
fig = px.line(years, x="Date_Y", y='Total', text="Total",height=700, width=1100, title='All Goals scored by Messi during his Career (Total)')
fig.update_traces(textposition="bottom right")
fig.update_layout(
   xaxis = dict(
      tickmode = 'linear',
      tick0 = 1 
   )
)
fig.show()

During the period of 2003 to 2022, the year 2012 was the best for Leo Messi, scoring a total of 79 goals just playing for Barcelona, not taking in consideration his performance at the Argentinean National Team.

In [615]:
years2 = df2.groupby(['Date_Y','Tournament'])['Goles'].count().to_frame('Total').reset_index()
years2
Out[615]:
Date_Y Tournament Total
0 2004 2ª B - Grupo III 5
1 2005 2ª B - Grupo III 1
2 2005 Champions League 1
3 2005 LaLiga 2
4 2006 Champions League 1
... ... ... ...
64 2021 LaLiga 23
65 2021 Ligue 1 1
66 2022 Champions League 4
67 2022 Ligue 1 12
68 2022 Trophée des Champions 1

69 rows × 3 columns

In [616]:
fig = px.line(years2, x="Date_Y", y='Total', text="Total",height=700, width=1100,color='Tournament',markers=True, title='Goals scored by Messi during his Career (By Tournament)')
fig.update_traces(textposition="bottom right")
fig.update_layout(
   xaxis = dict(
      tickmode = 'linear',
      tick0 = 1 
   )
)
fig.show()
In [ ]: